Worst Possible Visualisation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
stop_search_2021_worst <- readr::read_csv(here::here("data","stop-search","2021-09","2021-09-metropolitan-stop-and-search.csv"))%>%
janitor::clean_names()
## Rows: 14319 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Type, Gender, Age range, Self-defined ethnicity, Officer-defined e...
## dbl (2): Latitude, Longitude
## lgl (4): Part of a policing operation, Policing operation, Outcome linked t...
## dttm (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stop_search_2021_worst%>%
ggplot()+
geom_bar(aes(x = officer_defined_ethnicity,
fill = type))+
labs(title = "Number of Stop and Searches by Type and Ethnicity",
y = "",
x = "Ethnicity")

MET Police
library(readxl)
library(dplyr)
library(stringr)
# load 2021 September data
stop_search_2021 <- readr::read_csv(here::here("data","stop-search","2021-09","2021-09-metropolitan-stop-and-search.csv"))
## Rows: 14319 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Type, Gender, Age range, Self-defined ethnicity, Officer-defined e...
## dbl (2): Latitude, Longitude
## lgl (4): Part of a policing operation, Policing operation, Outcome linked t...
## dttm (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ward_population <- read_excel(path = here::here("data","/London-wards-2018_ESRI/CT0225_2011 Census - Age by ethnic group (based on CT0010) by sex - London HT wards.xlsx"),
sheet = "CT0225 - All usual residents",
skip = 11,
col_names = T,
range = "A11:VA674")%>%
janitor::clean_names()
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
for(i in 4:573){
if(!is.na(ward_population[1,i])){
temp <- ward_population[1,i]
}
else{
ward_population[1,i] <- temp
}
}
names(ward_population)[4:length(ward_population)] <- paste0(ward_population[1,],"_",ward_population[2,])[4:length(ward_population)]
ward_population <- ward_population%>%
janitor::clean_names()
ward_population <- ward_population[-c(1,2),]
ward_population <- ward_population%>%
# rename(area_code = x1,
# area_name = x2,
# total_population = x3)%>%
mutate(area_code = case_when(!is.na(x1) ~ str_split_fixed(ward_population$x1," ",2)[,1],
TRUE ~ str_split_fixed(ward_population$x2," ",2)[,1]),
.after = x1,
area_name = case_when(!is.na(x1) ~ str_split_fixed(ward_population$x1," ",2)[,2],
TRUE ~ str_split_fixed(ward_population$x2," ",2)[,2]),
population_total = x3)
ward_population <- subset(ward_population, select = -c(x1,x2,x3))
indx_black <- grepl('black', colnames(ward_population))
black_pop_total<-rowSums(data.frame(lapply(ward_population[which(indx_black)], as.numeric)))
ward_population_no_age <- ward_population%>%
mutate(black_population = black_pop_total,
population_total = as.numeric(population_total))%>%
select(area_code,
area_name,
population_total,
black_population)%>%
mutate(prc_black = black_population/population_total)
skimr::skim(stop_search_2021)
Data summary
| Name |
stop_search_2021 |
| Number of rows |
14319 |
| Number of columns |
15 |
| _______________________ |
|
| Column type frequency: |
|
| character |
8 |
| logical |
4 |
| numeric |
2 |
| POSIXct |
1 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Type |
0 |
1.00 |
13 |
25 |
0 |
3 |
0 |
| Gender |
177 |
0.99 |
4 |
6 |
0 |
3 |
0 |
| Age range |
2035 |
0.86 |
5 |
8 |
0 |
5 |
0 |
| Self-defined ethnicity |
171 |
0.99 |
13 |
84 |
0 |
17 |
0 |
| Officer-defined ethnicity |
316 |
0.98 |
5 |
5 |
0 |
4 |
0 |
| Legislation |
0 |
1.00 |
30 |
55 |
0 |
4 |
0 |
| Object of search |
44 |
1.00 |
8 |
35 |
0 |
8 |
0 |
| Outcome |
0 |
1.00 |
6 |
31 |
0 |
6 |
0 |
Variable type: logical
| Part of a policing operation |
0 |
1 |
0 |
FAL: 14319 |
| Policing operation |
14319 |
0 |
NaN |
: |
| Outcome linked to object of search |
14319 |
0 |
NaN |
: |
| Removal of more than just outer clothing |
14319 |
0 |
NaN |
: |
Variable type: numeric
| Latitude |
1582 |
0.89 |
51.50 |
0.06 |
51.2 |
51.46 |
51.51 |
51.55 |
51.69 |
▁▁▅▇▁ |
| Longitude |
1582 |
0.89 |
-0.11 |
0.14 |
-1.5 |
-0.19 |
-0.10 |
-0.02 |
0.27 |
▁▁▁▇▆ |
Variable type: POSIXct
| Date |
0 |
1 |
2021-08-31 23:00:00 |
2021-09-30 22:59:00 |
2021-09-16 09:44:00 |
8643 |
library(leaflet)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.3, PROJ 7.2.1
library(ggplot2)
library(dplyr)
library(leaflet.extras)
# read in the shapefile, transform it into long lat format
wards <- st_read(here::here("data/London-wards-2018_ESRI/London_Ward_CityMerged.shp"))
## Reading layer `London_Ward_CityMerged' from data source
## `/Users/kazmernagy-betegh/Library/Mobile Documents/com~apple~CloudDocs/LBS/AM10_Data Visualisation and Storytelling/am10.mam2022/data/London-wards-2018_ESRI/London_Ward_CityMerged.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 633 features and 6 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## Projected CRS: OSGB 1936 / British National Grid
wards <- st_transform(wards,crs=4326)
# transform points to sf
stops_sf <- st_as_sf(stop_search_2021%>%select(Longitude, Latitude)%>%na.omit,coords = c('Longitude',"Latitude"), crs = st_crs(wards))
# intersection of polygons and points
stop_locations <- stops_sf %>%
mutate(intersection = as.integer(st_intersects(geometry, wards$geometry)),
area = if_else(is.na(intersection), '', wards$NAME[intersection]))
# split geometry in coordinates
stop_locations <- stop_locations%>%
mutate(X= st_coordinates(geometry)[,1],
Y= st_coordinates(geometry)[,2])
# join areas to stop search
stop_search_2021 <- left_join(stop_search_2021, stop_locations, by = c("Longitude" = "X", "Latitude" = "Y" ))
stop_search_2021_wards <- left_join(stop_search_2021, wards, by = c("area"= "NAME"))
stop_search_2021_wards <- stop_search_2021_wards%>%
rename(point_geometry = geometry.x,
geometry = geometry.y)
# stop_search_2021_wards <- stop_search_2021_wards%>%select(-c("geometry"))
stop_search_2021_wards_pop <- left_join(stop_search_2021_wards,ward_population_no_age, by = c("area" = "area_name"))
stop_search_2021_wards_pop <- stop_search_2021_wards_pop%>%
janitor::clean_names()
# stop_search_2021_wards <- st_transform(stop_search_2021_wards,crs=4326)
prc_balck_stops_per_area <- stop_search_2021_wards_pop%>%
filter(!is.na(area), area != "", !is.na(officer_defined_ethnicity))%>%
group_by(area, officer_defined_ethnicity)%>%
summarise(ethnic_stops = n())%>%
mutate(prc_ethnic_stops = ethnic_stops/sum(ethnic_stops))%>%
filter(officer_defined_ethnicity == "Black")
## `summarise()` has grouped output by 'area'. You can override using the `.groups` argument.
prc_balck_stops_per_area <- merge(prc_balck_stops_per_area, data.frame(wards$NAME), by.x = "area", by.y = "wards.NAME", all.y = T)
pal <- colorNumeric("OrRd", stop_locations$intersection)
map_london <- leaflet()%>%
addTiles(
options = tileOptions(minZoom = 10, maxZoom = 15)
)%>%
addControl("London Stop and Search Frequency", position = 'bottomleft')%>%
setMaxBounds(lng1 = -0.147949,
lng2 = -0.117949,
lat1 = 51.20775,
lat2 = 51.70775)%>%
addPolygons(data = wards,
color = 'blue',
fillOpacity = 0.05,
weight = 0.5,
fill = ,
popup = ~paste0(NAME," num. stops: ",stop_locations$intersection[stop_locations$area == NAME],
"; ","Black Population: ",round(stop_search_2021_wards_pop$prc_black[which(stop_search_2021_wards_pop$area == NAME)]*100,2),"%",
"; "))%>%
addHeatmap(group = "heat",
data = stop_locations%>%na.omit,
lng = ~as.numeric(stop_locations$X),
lat = ~as.numeric(stop_locations$Y),
intensity = stop_locations$intersection,
radius = 6,
minOpacity = 0.08,
max = 0.7,
gradient = "OrRd")%>%
addLegend(values = stop_locations$intersection%>%na.omit,
group = "heat",
pal = colorNumeric("OrRd",stop_locations$intersection),
title = "Number of Stop and Searches")
## Warning in stop_locations$area == NAME: longer object length is not a multiple
## of shorter object length
## Warning in stop_search_2021_wards_pop$area == NAME: longer object length is not
## a multiple of shorter object length
library(tidyr)
london_ethnic_dist <- data.frame(as.factor(c("White", "Black", "Asian", "Other")),
c(59.8,18.4,13.3, 8.4))
colnames(london_ethnic_dist) <- c("ethnicity", "prc")
plot1 <- stop_search_2021%>%
janitor::clean_names()%>%
filter(!is.na(officer_defined_ethnicity), !is.na(self_defined_ethnicity))%>%
group_by(officer_defined_ethnicity)%>%
summarise(num_stops = n())%>%
mutate(prc_stops = round(num_stops/sum(num_stops)*100,2))%>%
mutate(prc = c(18.4,13.3, 8.4, 59.8))%>%
pivot_longer(cols = 3:4, names_to = "type", names_repair = "unique", values_to = "prc")%>%
ggplot()+
geom_col(aes(y = reorder(officer_defined_ethnicity, prc),
x = prc,
fill = type),
position = "dodge")+
geom_text(aes(y = reorder(officer_defined_ethnicity,prc),
x = prc,
label = paste0(prc,"%"),
group = type),
position = position_dodge(width = 1),
fontface = 2)+
theme_minimal()+
theme(panel.grid.major = element_blank(),
plot.caption.position = "plot",
plot.caption = element_text(vjust = 2, hjust = 0))+
labs(title = "40% of Stop and Searches conducted on 13% of Londons population",
y = "",
x = "% of Stop and Search Conducted in 2021 September",
caption = "NOTE: Ethnicity Breakdown of London from Wikipedia")+
scale_fill_manual(values=c("skyblue", "tomato"),
name="% distribution",
labels=c("Ethnic Distribution of London", "Stop and Search Ethnic Distribution"))
plot1

plot2 <- stop_search_2021%>%
janitor::clean_names()%>%
filter(!is.na(officer_defined_ethnicity), !is.na(self_defined_ethnicity))%>%
mutate(self_id = case_when(grepl("Black",self_defined_ethnicity)~"Black",
grepl("White",self_defined_ethnicity)~"White",
grepl("Asian",self_defined_ethnicity)~"Asian",
TRUE ~ "Other"))%>%
pivot_longer(cols = c(self_id, officer_defined_ethnicity), names_to = "classificaiton_type", values_to = "ethnicity")%>%
group_by(classificaiton_type, ethnicity)%>%
summarise(num_stops = n())%>%
mutate(prc_stops = round(num_stops/sum(num_stops)*100,2))%>%
ggplot()+
geom_col(aes(y = reorder(ethnicity, prc_stops),
x = prc_stops,
fill = classificaiton_type),
position = "dodge")+
geom_text(aes(y = reorder(ethnicity,prc_stops),
x = prc_stops,
label = paste0(prc_stops,"%"),
group = classificaiton_type),
position = position_dodge(width = 1),
fontface = 2)+
theme_minimal()+
theme(panel.grid.major = element_blank(),
plot.caption.position = "plot",
plot.caption = element_text(vjust = 2, hjust = 0))+
labs(title = "Only 63% of People Identified as Black by Officers Self Identify as that",
y = "",
x = "% of Stop and Search Conducted in 2021 September")+
scale_fill_manual(values=c( "tomato", "skyblue"),
name="",
labels=c("Self Defined Ethnicity", "Officer Defined Ethicity"))
## `summarise()` has grouped output by 'classificaiton_type'. You can override using the `.groups` argument.
